# clear working environment
rm(list = ls())

#### load the required packages ####
library(tidyverse)
library(PanelMatch)
library(countrycode)
library(haven)

#### load the clean data ####
df <- readRDS("data/dem-transitions-replication-data.rds") %>%
  glimpse()

#### make df into a properly formatted dataframe for PanelMatch ####

# begin coercing df into m
# df needs to be a dataframe class
m <- as.data.frame(df)
# check to see if m is a dataframe
class(m)

# country needs to be an integer to use PanelMatch
m$country <- as.integer(m$country)

# year needs to be an integer to use PanelMatch
m$year <- as.integer(m$year)

# create treatment variables
m$tr <- as.numeric(ifelse(m$v2x_regime >= 2, 1, 0))
m$boix_tr <- as.numeric(ifelse(m$e_boix_regime == 1, 1, 0))
m$gwf_tr <- as.numeric(ifelse(m$gwf_nonautocracy == "democracy", 1, 0))

# all other variables, except categorical variables, need to be numeric
m$mid <- as.numeric(m$mid)
m$fatal <- as.numeric(m$fatal)
m$ln_cinc <- as.numeric(m$ln_cinc)
m$ln_pecpp <- as.numeric(m$ln_pecpp)
m$terr_dispute <- as.numeric(m$terr_dispute)

# make sure all of the rows are unique
m <- distinct(m, country, year, .keep_all = TRUE)

# set seed to reproduce bootstrapped estimates
set.seed(1)

# effect of democratic transition on conflict 
# create vector of different treatments to loop over
tr <- c("tr", "boix_tr", "gwf_tr")

# MID participation (pooled three years)
# output data frame
gg1 <- data.frame(estimates = NA,
                  boot_se = NA,
                  boot_lo = NA,
                  boot_hi = NA,
                  treatment = NA,
                  outcome = NA,
                  number_periods = NA)
                  
# loop
for (i in 1:3) {
  
  # create panel data
  panel <- PanelData(panel.data = m,
                     unit.id = "country",
                     time.id = "year",
                     treatment = tr[i],
                     outcome = "mid")
  
  # create matched sets
  PMresults.mid.att <- PanelMatch(panel.data = panel, lag = 5, refinement.method = "CBPS.weight",
                                  match.missing = TRUE, exact.match.variables = c("un_region"),
                                  covs.formula = ~ I(lag(mid, 1:5)) + I(lag(ln_cinc, 1:5)) + I(lag(ln_pecpp, 1:5)) + I(lag(terr_dispute, 1:5)),
                                  qoi = "att", lead = 1:3, 
                                  forbid.treatment.reversal = FALSE,
                                  use.diagonal.variance.matrix = TRUE)
  
  # estimate the ATT
  PEresults.mid.att <- PanelEstimate(panel.data = panel,
                                     sets = PMresults.mid.att,
                                     se.method = "bootstrap", number.iterations = 10000,
                                     confidence.level = .90,
                                     pooled = TRUE)
  
  
  gg1[i,] <- c(PEresults.mid.att[["estimate"]],
               PEresults.mid.att[["standard.error"]],
               quantile(PEresults.mid.att[["bootstrapped.estimates"]], probs = 0.05),
               quantile(PEresults.mid.att[["bootstrapped.estimates"]], probs = 0.95),
               tr[i],
               "All MIDs",
               "3")
  
}

# MID participation (pooled five years)
# output data frame
gg2 <- data.frame(estimates = NA,
                  boot_se = NA,
                  boot_lo = NA,
                  boot_hi = NA,
                  treatment = NA,
                  outcome = NA,
                  number_periods = NA)

# loop
for (i in 1:3) {
  
  # create panel data
  panel <- PanelData(panel.data = m,
                     unit.id = "country",
                     time.id = "year",
                     treatment = tr[i],
                     outcome = "mid")
  
  # create matched sets
  PMresults.mid.att <- PanelMatch(panel.data = panel, lag = 5, refinement.method = "CBPS.weight",
                                  match.missing = TRUE, exact.match.variables = c("un_region"),
                                  covs.formula = ~ I(lag(mid, 1:5)) + I(lag(ln_cinc, 1:5)) + I(lag(ln_pecpp, 1:5)) + I(lag(terr_dispute, 1:5)),
                                  qoi = "att", lead = 1:5, 
                                  forbid.treatment.reversal = FALSE,
                                  use.diagonal.variance.matrix = TRUE)
  
  # estimate the ATT
  PEresults.mid.att <- PanelEstimate(panel.data = panel, 
                                     sets = PMresults.mid.att,
                                     se.method = "bootstrap", number.iterations = 10000,
                                     confidence.level = .90,
                                     pooled = TRUE)
  
  # assemble data frame
  gg2[i,] <- c(PEresults.mid.att[["estimate"]],
               PEresults.mid.att[["standard.error"]],
               quantile(PEresults.mid.att[["bootstrapped.estimates"]], probs = 0.05),
               quantile(PEresults.mid.att[["bootstrapped.estimates"]], probs = 0.95),
               tr[i],
               "All MIDs",
               "5")
  
}

# Fatal MID participation (pooled three years)
# output data frame
gg3 <- data.frame(estimates = NA,
                  boot_se = NA,
                  boot_lo = NA,
                  boot_hi = NA,
                  treatment = NA,
                  outcome = NA,
                  number_periods = NA)

# loop
for (i in 1:3) {
  
  # create panel data
  panel <- PanelData(panel.data = m,
                     unit.id = "country",
                     time.id = "year",
                     treatment = tr[i],
                     outcome = "fatal")
  
  # create matched sets
  PMresults.fatal.att <- PanelMatch(panel.data = panel, lag = 5, refinement.method = "CBPS.weight",
                                  match.missing = TRUE, exact.match.variables = c("un_region"),
                                  covs.formula = ~ I(lag(fatal, 1:5)) + I(lag(ln_cinc, 1:5)) + I(lag(ln_pecpp, 1:5)) + I(lag(terr_dispute, 1:5)),
                                  qoi = "att", lead = 1:3,
                                  forbid.treatment.reversal = FALSE,
                                  use.diagonal.variance.matrix = TRUE)
  
  # estimate the ATT
  PEresults.fatal.att <- PanelEstimate(panel.data = panel, 
                                       sets = PMresults.fatal.att, 
                                       se.method = "bootstrap", number.iterations = 10000,
                                       confidence.level = .90,
                                       pooled = TRUE)
  
  # assemble data frame
  gg3[i,] <- c(PEresults.fatal.att[["estimate"]],
               PEresults.fatal.att[["standard.error"]],
               quantile(PEresults.fatal.att[["bootstrapped.estimates"]], probs = 0.05),
               quantile(PEresults.fatal.att[["bootstrapped.estimates"]], probs = 0.95),
               tr[i],
               "Fatal MIDs",
               "3")
  
}

# Fatal MID participation (pooled five years)
# output data frame
gg4 <- data.frame(estimates = NA,
                  boot_se = NA,
                  boot_lo = NA,
                  boot_hi = NA,
                  treatment = NA,
                  outcome = NA,
                  number_periods = NA)

# loop
for (i in 1:3) {
  
  # create panel data
  panel <- PanelData(panel.data = m,
                     unit.id = "country",
                     time.id = "year",
                     treatment = tr[i],
                     outcome = "fatal")
  
  # create matched sets
  PMresults.fatal.att <- PanelMatch(panel.data = panel, lag = 5, refinement.method = "CBPS.weight",
                                    match.missing = TRUE, exact.match.variables = c("un_region"),
                                    covs.formula = ~ I(lag(fatal, 1:5)) + I(lag(ln_cinc, 1:5)) + I(lag(ln_pecpp, 1:5)) + I(lag(terr_dispute, 1:5)),
                                    qoi = "att", lead = 1:5, 
                                    forbid.treatment.reversal = FALSE,
                                    use.diagonal.variance.matrix = TRUE)
  
  # estimate the ATT
  PEresults.fatal.att <- PanelEstimate(panel.data = panel,
                                       sets = PMresults.fatal.att, 
                                       se.method = "bootstrap", number.iterations = 10000,
                                       confidence.level = .90,
                                       pooled = TRUE)
  
  # assemble data frame
  gg4[i,] <- c(PEresults.fatal.att[["estimate"]],
               PEresults.fatal.att[["standard.error"]],
               quantile(PEresults.fatal.att[["bootstrapped.estimates"]], probs = 0.05),
               quantile(PEresults.fatal.att[["bootstrapped.estimates"]], probs = 0.95),
               tr[i],
               "Fatal MIDs",
               "5")
  
}

# recode treatment variable
gg <- rbind(gg1, gg2, gg3, gg4) %>%
  mutate(treatment = case_when(treatment == "tr" ~ "V-Dem",
                               treatment == "boix_tr" ~ "Boix et al. (2013)",
                               treatment == "gwf_tr" ~ "Geddes et al. (2014)")) %>%
  glimpse()

# make values numeric
gg$estimates <- as.numeric(gg$estimates)
gg$boot_lo <- as.numeric(gg$boot_lo)
gg$boot_hi <- as.numeric(gg$boot_hi)

# create Figure I.1 in the Appendix
ggplot(data = gg) +
  geom_hline(aes(yintercept = 0), linetype = "dashed", linewidth = .25) +
  geom_pointrange(aes(x = outcome, y = estimates, ymin = boot_lo, ymax = boot_hi, shape = number_periods, color = number_periods), size = .4, linewidth = .70, position = position_dodge(width = .2)) +
  scale_color_manual(values = c("black", "darkgray")) +
  ylim(-.25, .1) +
  facet_wrap(~treatment) +
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.text.x = element_text(color = "black", size = 10),
        axis.title.y = element_text(size = 12),
        legend.title = element_text(size = 12),
        strip.text.x = element_text(size = 12)) +
  labs(y = "Effect of Democratic Transition on Pr(Conflict)",
       x = "",
       color = "Years Since Transition (Pooled)",
       shape = "Years Since Transition (Pooled)")

# save plot
ggsave("figures/different-measures-pooled-mid-att-plot.pdf", width = 7.5, height = 5, units = "in", bg = "white")
